In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime
from impala.util import as_pandas
from impala.dbapi import connect
import cPickle as pickle
%matplotlib notebook
plt.style.use('ggplot')
In [ ]:
conn = connect(host="mycluster.domain.com", port=my_impala_port_number)
cur = conn.cursor()
cur.execute("use my_db")
List of medications/drugs which may be predictive: From clinical_event:
- "on_iv" - if the patient is on an iv drip, event_cd = 679984
- "bu-nal - is the patient taking buprenorphine and/or naloxone, event_cd= 2797130, 2797129, 2797129
- "dialysis" - is the patient on dialysis (event_cd = 186470117)
From orders table:
- "anticoagulants" - multum_category_id = 261, 262, 283, 285
- "narcotics" - multum_category_id = 60
- "narc-ans" - narcotic analgesic combinations; multum_category_id = 191
- "antipsychotics" - multum_category_id = 77, 210, 251, 341
- "chemo" - multum_category_id = 20, 21, 22, 23, 24, 25, 26
In [ ]:
pairs = [("'20','21','22','23','24','25','26'", 'chemo'), ("'60'", 'narcotics'), ("'191'", 'narc-ans'),
("'261','262','283','285'", 'anticoagulants'), ("'77', '210','251', '341'", 'antipsychotics') ]
count_tracker1_notrrt= []
for i in range(len(pairs)):
query1_notRRT = """
SELECT count(1)
FROM (
SELECT DISTINCT encntr_id
FROM (
SELECT
encntr_id
, cki
, substr(cki,9) as cki_id
, order_id FROM orders
) ords
LEFT OUTER JOIN mltm_category_drug_xref mdx
ON ords.cki_id = mdx.drug_identifier
LEFT OUTER JOIN mltm_drug_categories mdc
ON mdc.multum_category_id = mdx.multum_category_id
WHERE ords.encntr_id IN (
SELECT DISTINCT encntr_id
FROM encounter
WHERE loc_facility_cd='633867'
AND encntr_complete_dt_tm < 4e12
AND admit_type_cd != '0'
AND encntr_type_class_cd='391'
AND encntr_id NOT IN ( -- Add/remove "NOT" to select from or not from encounters from RRT events
SELECT DISTINCT encntr_id
FROM clinical_event
WHERE event_cd='54411998'
AND result_status_cd NOT IN ('31', '36')
AND valid_until_dt_tm > 4e12
AND event_class_cd not in ('654645')
)
)
AND mdx.multum_category_id IN ({0})
) AS t;
""".format(pairs[i][0])
cur.execute(query1_notRRT)
count = cur.fetchall()
count = count[0][0]
count_tracker1_notrrt.append((pairs[i][1], count))
In [ ]:
count_tracker2_notrrt= []
# strings = ['679984', '2797130', '2798305', '2797129', '186470117']
pairs = [('679984', 'on_iv'), ('2797130', 'bu-nal'), ('2797129', 'buprenorphine'),
('2798305', 'naloxone'), ('186470117', 'dialysis')]
for i in range(len(pairs)):
string = pairs[i][0]
query2_notRRT = """
SELECT count(1)
FROM (
SELECT DISTINCT ce.encntr_id
FROM clinical_event ce
LEFT OUTER JOIN code_value cv ON cv.code_value = ce.event_cd
WHERE ce.event_cd IN ('{0}')
AND ce.encntr_id IN (
SELECT DISTINCT encntr_id
FROM encounter
WHERE loc_facility_cd='633867'
AND encntr_complete_dt_tm < 4e12
AND admit_type_cd != '0'
AND encntr_type_class_cd='391'
AND encntr_id NOT IN (
SELECT DISTINCT encntr_id
FROM clinical_event
WHERE event_cd='54411998'
AND result_status_cd NOT IN ('31', '36')
AND valid_until_dt_tm > 4e12
AND event_class_cd not in ('654645')
)
)
) AS t;
""".format(string)
cur.execute(query2_notRRT)
count = cur.fetchall()
count = count[0][0]
count_tracker2_notrrt.append((pairs[i][1], count))
In [ ]:
pairs = [("'20','21','22','23','24','25','26'", 'chemo'), ("'60'", 'narcotics'), ("'191'", 'narc-ans'),
("'261','262','283','285'", 'anticoagulants'), ("'77', '210','251', '341'", 'antipsychotics') ]
count_tracker1_rrt= []
for i in range(len(pairs)):
query1_RRT = """
SELECT count(1)
FROM (
SELECT DISTINCT ords.encntr_id
FROM (
SELECT
encntr_id
, cki, substr(cki,9) as cki_id
, order_id FROM orders
) ords
LEFT OUTER JOIN mltm_category_drug_xref mdx
ON ords.cki_id = mdx.drug_identifier
LEFT OUTER JOIN mltm_drug_categories mdc
ON mdc.multum_category_id = mdx.multum_category_id
WHERE ords.encntr_id IN (
SELECT DISTINCT encntr_id
FROM encounter
WHERE loc_facility_cd='633867'
AND encntr_complete_dt_tm < 4e12
AND admit_type_cd != '0'
AND encntr_type_class_cd='391'
AND encntr_id IN (
SELECT DISTINCT encntr_id
FROM clinical_event
WHERE event_cd='54411998'
AND result_status_cd NOT IN ('31', '36')
AND valid_until_dt_tm > 4e12
AND event_class_cd not in ('654645')
)
)
AND mdx.multum_category_id IN ({0})
) AS t;
""".format(pairs[i][0])
cur.execute(query1_RRT)
count = cur.fetchall()
count = count[0][0]
count_tracker1_rrt.append((pairs[i][1], count))
In [ ]:
count_tracker2_rrt= []
pairs = [('679984', 'on_iv'), ('2797130', 'bu-nal'), ('2797129', 'buprenorphine'),
('2798305', 'naloxone'), ('186470117', 'dialysis')]
for i in range(len(pairs)):
string = pairs[i][0]
query2_RRT = """
SELECT count(1)
FROM (
SELECT DISTINCT ce.encntr_id
FROM clinical_event ce
LEFT OUTER JOIN code_value cv
ON cv.code_value = ce.event_cd
WHERE ce.event_cd IN ('{0}')
AND ce.encntr_id IN (
SELECT DISTINCT encntr_id
FROM encounter
WHERE loc_facility_cd='633867'
AND encntr_complete_dt_tm < 4e12
AND admit_type_cd != '0'
AND encntr_type_class_cd='391'
AND encntr_id IN (
SELECT DISTINCT encntr_id
FROM clinical_event
WHERE event_cd='54411998'
AND result_status_cd NOT IN ('31', '36')
AND valid_until_dt_tm > 4e12
AND event_class_cd not in ('654645')
)
)
) AS t;
""".format(string)
cur.execute(query2_RRT)
count = cur.fetchall()
count = count[0][0]
count_tracker2_rrt.append((pairs[i][1], count))
In [ ]:
# total number of complete, non-RRT encounters in Main Hospital:
query_num_notrrt = """
SELECT count(*)
FROM encounter
WHERE loc_facility_cd='633867'
AND encntr_complete_dt_tm < 4e12
AND admit_type_cd != '0'
AND encntr_id NOT IN (
SELECT DISTINCT encntr_id
FROM clinical_event
WHERE event_cd='54411998'
AND result_status_cd NOT IN ('31', '36')
AND valid_until_dt_tm > 4e12
AND event_class_cd not in ('654645')
);
"""
cur.execute(query_num_notrrt)
count_notrrt_enc = cur.fetchall()
count_notrrt_enc = float(count_notrrt_enc[0][0])
In [ ]:
count_notrrt_enc
In [ ]:
# total number of complete RRT encounters in Main Hospital:
query_num_rrt = """
SELECT count(*)
FROM encounter
WHERE loc_facility_cd='633867'
AND encntr_complete_dt_tm < 4e12
AND admit_type_cd != '0'
AND encntr_id IN (
SELECT DISTINCT encntr_id
FROM clinical_event
WHERE event_cd='54411998'
AND result_status_cd NOT IN ('31', '36')
AND valid_until_dt_tm > 4e12
AND event_class_cd not in ('654645')
);
"""
cur.execute(query_num_rrt)
count_rrt_enc = cur.fetchall()
count_rrt_enc = float(count_rrt_enc[0][0])
In [ ]:
count_rrt_enc
In [ ]:
count_tracker_notrrt = count_tracker1_notrrt+ count_tracker2_notrrt
count_tracker_rrt = count_tracker1_rrt + count_tracker2_rrt
In [ ]:
count_tracker_rrt
In [ ]:
count_tracker_notrrt
In [ ]:
rrt_df = pd.DataFrame.from_records(count_tracker_rrt, columns =[['medication', 'count']])
In [ ]:
nonrrt_df = pd.DataFrame.from_records(count_tracker_notrrt, columns =[['medication', 'count']])
In [ ]:
# A df with cols medication, count, and fraction_rrt/fraction_notrrt
rrt_df['fraction_rrt'] = rrt_df['count']/count_rrt_enc
nonrrt_df['fraction_notrrt'] = nonrrt_df['count']/count_notrrt_enc
In [ ]:
rrt_df
In [ ]:
nonrrt_df
In [ ]:
# We now have a handy way to compare the fraction of people with/without RRT events by meds
df = pd.concat([nonrrt_df[['medication','fraction_notrrt']], rrt_df['fraction_rrt']],axis=1)
df
In [ ]:
ind = np.arange(df.shape[0])
width = 0.35
In [ ]:
df = df.sort_values(by='fraction_rrt')
fig, ax = plt.subplots(figsize=(10,8))
bar_not_rrt = ax.barh(ind+width, df.fraction_notrrt, width, color = 'b')
bar_rrt = ax.barh(ind, df.fraction_rrt, width, color='r')
ax.set_yticks(ind+width)
ax.set_yticklabels((df.medication))
ax.set_xlim([0,1])
ax.tick_params(labelsize='14')
ax.set_xlabel('Fraction of patients in category taking that medication type', fontsize='14')
ax.legend(('Patients without RRT events', 'Patients with RRT events'), loc=4, fontsize='14')
plt.tight_layout()